pacman::p_load(ggplot2, dplyr, caret, tidyr,arules, arulesViz, RColorBrewer, caTools )

# install.packages("arules")
# install.packages("arulesViz")
# library("arules")
# library("arulesViz")



#Upload and read the dataset
tr <- read.transactions("~/Documents/@/UBIQUM/DATAML/Week7/Market_Basket_Analysis/MarketBasketAnalysis/DATASETS/ElectronidexTransactions.csv", format = 'basket', sep=',', header = F)




#Convert dataset as transaction object
#trObj<-as(tr,"transactions")

#Summary of the data
length (tr) # Number of transactions.
## [1] 10454
tr
## transactions in sparse format with
##  10454 transactions (rows) and
##  4248 items (columns)
summary(tr)
## transactions as itemMatrix in sparse format with
##  10454 rows (elements/itemsets/transactions) and
##  4248 columns (items) and a density of 0.0005853147 
## 
## most frequent items:
## APP0692 APP1184 SAM0068 APP1208 WDT0177 (Other) 
##     290     283     214     210     185   24811 
## 
## element (itemset/transaction) length distribution:
## sizes
##    1    2    3    4    5    6    7    8    9   10   11   13 
##    2 7197 2138  700  260   89   30   18   12    3    4    1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   2.000   2.486   3.000  13.000 
## 
## includes extended item information - examples:
##    labels
## 1 8MO0001
## 2 8MO0002
## 3 8MO0003
items(tr)
## itemMatrix in sparse format with
##  10454 rows (elements/transactions) and
##  4248 columns (items)
#Visualize the data
itemFrequencyPlot(tr)

#its not working!!
#image(tr[1:500])
#image(sample(tr, 2000))

#are there any duplicate?
#duplicated(tr) 




#Inspect particular elements
inspect(tr[1:100])
##       items                                    
## [1]   {items}                                  
## [2]   {IFX0014,IFX0049,MOP0083,TUC0302}        
## [3]   {APP1130,CRU0045,OWC0048,SEA0038}        
## [4]   {APP0405,APP1208}                        
## [5]   {APP0432,APP1457,APP2487,APP2552,BOS0059}
## [6]   {APP1208,APP1459,ELA0004,LAC0221,LGE0038}
## [7]   {APP1916,APP2498}                        
## [8]   {ALL0004,APP2523,LAC0166,SAN0150,WOE0002}
## [9]   {BEZ0209,SAT0024}                        
## [10]  {APP1037,LAC0176,SAT0003}                
## [11]  {EVU0003,SAN0039}                        
## [12]  {CAD0005,PAC2070}                        
## [13]  {SEA0033,SEA0043}                        
## [14]  {APP2481,APP2513}                        
## [15]  {APP1859,OWC0216,OWC0226}                
## [16]  {OWC0035-2,OWC0036}                      
## [17]  {BEL0183,ELA0021}                        
## [18]  {FCM0010,SAN0106}                        
## [19]  {KIN0144,LAC0225}                        
## [20]  {PHI0063,WDT0135}                        
## [21]  {AP20299,OWC0002}                        
## [22]  {LEX0010,PAC1494}                        
## [23]  {PHI0057,PHI0066}                        
## [24]  {BEL0263,LAC0206}                        
## [25]  {BEL0223,KIN0115,MOS0059,MOS0148}        
## [26]  {APP2477,IOT0012,OTT0156,XDO0033}        
## [27]  {APP0696,NTE0015}                        
## [28]  {APP0431,IOT0008,IOT0020}                
## [29]  {APP0692,BNQ0036-A,SAN0116-A}            
## [30]  {APP1565,LAC0176,OWC0184}                
## [31]  {APP0656,CAD0005,PAC2105}                
## [32]  {OWC0001,SEA0096}                        
## [33]  {APP2323,SAT0023}                        
## [34]  {APP1204,KIN0153-2}                      
## [35]  {SAN0139,STA0043,STA0046}                
## [36]  {APP0660,APP1565,IFX0131,IFX0163}        
## [37]  {OWC0165,PAC1493}                        
## [38]  {GRT0394,THU0016}                        
## [39]  {APP1459,MOS0176}                        
## [40]  {NTE0007,NTE0020,OWC0094,PAC0748}        
## [41]  {APP2058,LMP0021}                        
## [42]  {BEL0236,GRT0438}                        
## [43]  {MOX0020,SAN0093}                        
## [44]  {MUV0162,XOO0005,XOO0006}                
## [45]  {IFX0010,OWC0036,SAM0068,SEA0097}        
## [46]  {APP0660,APP1566}                        
## [47]  {APP0656,PAC1588}                        
## [48]  {KIN0150,NTE0007,OWC0094,SAM0069}        
## [49]  {SYN0133,WDT0177}                        
## [50]  {APP0404,BEL0223,DLK0072,LAC0199,PAC0486}
## [51]  {APP0017,APP1208}                        
## [52]  {SYN0140,WDT0177}                        
## [53]  {SYN0121,WDT0177}                        
## [54]  {WAC0034,WAC0185}                        
## [55]  {APP0695,SPE0155}                        
## [56]  {KEN0206,SAN0106}                        
## [57]  {SAN0084,THU0019}                        
## [58]  {PHI0066,PHI0073}                        
## [59]  {GRT0355,OTT0133}                        
## [60]  {APP0692,GRT0369}                        
## [61]  {LIF0089,SAT0008}                        
## [62]  {PHI0070,SAT0044}                        
## [63]  {SAN0093,SEA0100}                        
## [64]  {OWC0018,OWC0086,SAM0068}                
## [65]  {IFX0015,IFX0039,IFX0073}                
## [66]  {SAM0067,SYN0121}                        
## [67]  {IFX0039,TRK0003}                        
## [68]  {SYN0122,WDT0135}                        
## [69]  {IFX0144,NTE0038,OWC0040-2,PAC1498}      
## [70]  {SYN0180,WDT0177}                        
## [71]  {APP1639,APP1641}                        
## [72]  {APP1205,APP1669}                        
## [73]  {MOX0024,SAT0015}                        
## [74]  {OWC0001,OWC0037-2,SAM0063,SEA0096}      
## [75]  {KAN0021,NTE0006}                        
## [76]  {PHI0055,WOE0005,WOE0006}                
## [77]  {ELG0032,ELG0034}                        
## [78]  {HTE0002,TIG0018}                        
## [79]  {BEL0165,TPL0032}                        
## [80]  {APP0921,MOS0059,MUV0145}                
## [81]  {ELG0042,PAC1394,SAT0005,SAT0028,SAT0037}
## [82]  {MOX0009,SPE0162}                        
## [83]  {APP0692,OTT0130,SAT0017}                
## [84]  {HTE0003,WIT0024}                        
## [85]  {IFX0014,TRA0004}                        
## [86]  {OWC0181-2,PAC0617}                      
## [87]  {APP0921,TPL0032}                        
## [88]  {LIF0070,LIF0072,LIF0099}                
## [89]  {BEL0292,MOS0204}                        
## [90]  {GRT0421,IFX0013,MOX0012}                
## [91]  {SAN0127,SAN0128}                        
## [92]  {PAC1729,TOS0014}                        
## [93]  {KAN0022,OWC0036-2}                      
## [94]  {APP1146,WOE0002}                        
## [95]  {APP1041,APP1915,WIT0026}                
## [96]  {APP0921,BEL0206,SPE0135}                
## [97]  {HTE0004,PAR0044,TAM0006}                
## [98]  {HTE0001,HTE0003}                        
## [99]  {BEL0280,MUV0162,OTT0153}                
## [100] {OWC0142,SEA0038}
#Items Frequency (absolute)
#plots the numeric frequencies of each item independently
itemFrequencyPlot(tr,topN=20,type="absolute",col=brewer.pal(8,'Pastel2'), main="Absolute Item Frequency Plot")

#Items Frequency (relative)
#plots how many times these items have appeared as compared to others. #APP0692 and APP 1184 have the most sales
itemFrequencyPlot(tr,topN=20,type="relative",col=brewer.pal(8,'Pastel2'),main="Relative Item Frequency Plot")

#Evaluating model performance

# Min Support as 0.001, confidence as 0.8.

# Min Support as 0.0005, confidence as 0.8
associationrules <- apriori(tr, parameter = list(supp=0.0005, conf=0.8))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.8    0.1    1 none FALSE            TRUE       5   5e-04      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 5 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[4248 item(s), 10454 transaction(s)] done [0.01s].
## sorting and recoding items ... [1125 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.01s].
## writing ... [6 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
inspect(associationrules)
##     lhs          rhs       support      confidence lift       count
## [1] {OWC0056} => {OWC0054} 0.0005739430 1.0000000   871.16667  6   
## [2] {WAC0156} => {WAC0158} 0.0006696002 1.0000000  1493.42857  7   
## [3] {WAC0158} => {WAC0156} 0.0006696002 1.0000000  1493.42857  7   
## [4] {PRY0004} => {PRY0003} 0.0005739430 1.0000000   614.94118  6   
## [5] {NES0009} => {NES0006} 0.0006696002 1.0000000  1045.40000  7   
## [6] {APP1803} => {APP1215} 0.0016261718 0.8947368    65.40964 17
summary(associationrules)  # 6 rules.: 6 of two products
## set of 6 rules
## 
## rule length distribution (lhs + rhs):sizes
## 2 
## 6 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2       2       2       2       2       2 
## 
## summary of quality measures:
##     support            confidence          lift             count       
##  Min.   :0.0005739   Min.   :0.8947   Min.   :  65.41   Min.   : 6.000  
##  1st Qu.:0.0005979   1st Qu.:1.0000   1st Qu.: 679.00   1st Qu.: 6.250  
##  Median :0.0006696   Median :1.0000   Median : 958.28   Median : 7.000  
##  Mean   :0.0007971   Mean   :0.9825   Mean   : 930.63   Mean   : 8.333  
##  3rd Qu.:0.0006696   3rd Qu.:1.0000   3rd Qu.:1381.42   3rd Qu.: 7.000  
##  Max.   :0.0016262   Max.   :1.0000   Max.   :1493.43   Max.   :17.000  
## 
## mining info:
##  data ntransactions support confidence
##    tr         10454   5e-04        0.8
# Min Support as 0.0005, confidence as 0.5
associationrules1 <- apriori(tr, parameter = list(supp=0.0005, conf=0.5)) 
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.5    0.1    1 none FALSE            TRUE       5   5e-04      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 5 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[4248 item(s), 10454 transaction(s)] done [0.01s].
## sorting and recoding items ... [1125 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.01s].
## writing ... [48 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
inspect(associationrules1)
##      lhs                  rhs       support      confidence lift      
## [1]  {OWC0056}         => {OWC0054} 0.0005739430 1.0000000   871.16667
## [2]  {OWC0054}         => {OWC0056} 0.0005739430 0.5000000   871.16667
## [3]  {WAC0156}         => {WAC0158} 0.0006696002 1.0000000  1493.42857
## [4]  {WAC0158}         => {WAC0156} 0.0006696002 1.0000000  1493.42857
## [5]  {PRY0004}         => {PRY0003} 0.0005739430 1.0000000   614.94118
## [6]  {NES0009}         => {NES0006} 0.0006696002 1.0000000  1045.40000
## [7]  {NES0006}         => {NES0009} 0.0006696002 0.7000000  1045.40000
## [8]  {PAC2115}         => {CAD0005} 0.0005739430 0.7500000   186.67857
## [9]  {APP2125}         => {APP1215} 0.0005739430 0.7500000    54.82867
## [10] {QNA0149}         => {WDT0177} 0.0005739430 0.6666667    37.67207
## [11] {SYN0174}         => {WDT0177} 0.0005739430 0.5454545    30.82260
## [12] {APP2114}         => {APP1215} 0.0005739430 0.6000000    43.86294
## [13] {APP1575}         => {LIBRO}   0.0007652573 0.6666667   142.23129
## [14] {APP1623}         => {APP1215} 0.0005739430 0.6666667    48.73660
## [15] {PAC2154}         => {APP0017} 0.0005739430 0.6000000   101.16774
## [16] {SSE0005}         => {SSE0004} 0.0006696002 0.6363636   604.77686
## [17] {SSE0004}         => {SSE0005} 0.0006696002 0.6363636   604.77686
## [18] {NEA0011}         => {NEA0004} 0.0005739430 0.7500000   871.16667
## [19] {NEA0004}         => {NEA0011} 0.0005739430 0.6666667   871.16667
## [20] {APP2113}         => {APP1215} 0.0006696002 0.7777778    56.85936
## [21] {SPH0014}         => {SPH0016} 0.0005739430 0.5454545   335.42246
## [22] {SPH0014}         => {SPH0015} 0.0006696002 0.6363636   332.62727
## [23] {IFX0087}         => {IFX0028} 0.0007652573 0.7272727   330.56126
## [24] {ALL0011}         => {ALL0002} 0.0006696002 0.5833333   203.27222
## [25] {PHI0054}         => {PHI0070} 0.0005739430 0.5454545    91.97067
## [26] {SNS0019}         => {SNS0014} 0.0007652573 0.5000000   193.59259
## [27] {APP2118}         => {APP1215} 0.0008609145 0.6428571    46.99600
## [28] {SPH0016}         => {SPH0015} 0.0010522288 0.6470588   338.21765
## [29] {SPH0015}         => {SPH0016} 0.0010522288 0.5500000   338.21765
## [30] {APP2486}         => {APP1184} 0.0008609145 0.6923077    25.57380
## [31] {QNA0210}         => {WDT0177} 0.0011478860 0.5714286    32.29035
## [32] {APP2480}         => {APP1184} 0.0007652573 0.5000000    18.46996
## [33] {SYN0180}         => {WDT0177} 0.0012435431 0.5652174    31.93937
## [34] {OWC0235-2}       => {NTE0007} 0.0006696002 0.5000000    29.20112
## [35] {OWC0104}         => {NTE0007} 0.0009565716 0.6250000    36.50140
## [36] {APP0428}         => {APP0432} 0.0009565716 0.5000000   121.55814
## [37] {APP2142}         => {APP1215} 0.0011478860 0.5454545    39.87540
## [38] {PHI0061}         => {PHI0062} 0.0009565716 0.5882353   323.65325
## [39] {PHI0062}         => {PHI0061} 0.0009565716 0.5263158   323.65325
## [40] {SYN0181}         => {WDT0177} 0.0013392003 0.5000000    28.25405
## [41] {APP1040}         => {APP1565} 0.0009565716 0.5555556   148.91738
## [42] {APP2485}         => {APP1184} 0.0010522288 0.5500000    20.31696
## [43] {APP2117}         => {APP1215} 0.0015305146 0.6666667    48.73660
## [44] {APP1914}         => {APP1041} 0.0011478860 0.5217391   165.28063
## [45] {APP1803}         => {APP1215} 0.0016261718 0.8947368    65.40964
## [46] {APP2155}         => {APP1215} 0.0025827434 0.5510204    40.28229
## [47] {APP0979}         => {APP0692} 0.0044958867 0.6103896    22.00349
## [48] {APP2117,APP2155} => {APP1215} 0.0005739430 0.5454545    39.87540
##      count
## [1]   6   
## [2]   6   
## [3]   7   
## [4]   7   
## [5]   6   
## [6]   7   
## [7]   7   
## [8]   6   
## [9]   6   
## [10]  6   
## [11]  6   
## [12]  6   
## [13]  8   
## [14]  6   
## [15]  6   
## [16]  7   
## [17]  7   
## [18]  6   
## [19]  6   
## [20]  7   
## [21]  6   
## [22]  7   
## [23]  8   
## [24]  7   
## [25]  6   
## [26]  8   
## [27]  9   
## [28] 11   
## [29] 11   
## [30]  9   
## [31] 12   
## [32]  8   
## [33] 13   
## [34]  7   
## [35] 10   
## [36] 10   
## [37] 12   
## [38] 10   
## [39] 10   
## [40] 14   
## [41] 10   
## [42] 11   
## [43] 16   
## [44] 12   
## [45] 17   
## [46] 27   
## [47] 47   
## [48]  6
#inspect(associationrules[1:6])
summary(associationrules1) # 48 rules: 47 rules with two products, 1 with three
## set of 48 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2  3 
## 47  1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.000   2.000   2.021   2.000   3.000 
## 
## summary of quality measures:
##     support            confidence          lift             count       
##  Min.   :0.0005739   Min.   :0.5000   Min.   :  18.47   Min.   : 6.000  
##  1st Qu.:0.0005739   1st Qu.:0.5455   1st Qu.:  39.88   1st Qu.: 6.000  
##  Median :0.0006696   Median :0.6177   Median : 131.89   Median : 7.000  
##  Mean   :0.0009227   Mean   :0.6521   Mean   : 309.63   Mean   : 9.646  
##  3rd Qu.:0.0009805   3rd Qu.:0.6942   3rd Qu.: 338.22   3rd Qu.:10.250  
##  Max.   :0.0044959   Max.   :1.0000   Max.   :1493.43   Max.   :47.000  
## 
## mining info:
##  data ntransactions support confidence
##    tr         10454   5e-04        0.5
#set different Confidences values
# associationrules2 <- apriori(tr, parameter = list(supp=0.0005, conf=0.6))
# inspect(associationrules2)
# associationrules3 <- apriori(tr, parameter = list(supp=0.0005, conf=0.7))
# inspect(associationrules3)
# associationrules4 <- apriori(tr, parameter = list(supp=0.0005, conf=0.9))
# inspect(associationrules4)
# associationrules5 <- apriori(tr, parameter = list(supp=0.0005, conf=1))
# inspect(associationrules5)

#set different Support values
# associationrulesS1 <- apriori(tr, parameter = list(supp=0.0001, conf=0.6)) 
# associationrulesS2 <- apriori(tr, parameter = list(supp=0.0003, conf=0.6)) 
# associationrulesS3 <- apriori(tr, parameter = list(supp=0.0005, conf=0.6)) 
# associationrulesS4 <- apriori(tr, parameter = list(supp=0.0005, conf=0.6)) 
# associationrulesS5 <- apriori(tr, parameter = list(supp=0.0007, conf=0.6)) 
# associationrulesS6 <- apriori(tr, parameter = list(supp=0.001, conf=0.6)) 
# associationrulesS7 <- apriori(tr, parameter = list(supp=0.00001, conf=0.6)) 
# inspect(associationrulesS7[1:100])
# summary(associationrulesS7)
# associationrulesS8 <- apriori(tr, parameter = list(supp=0.00009, conf=0.6)) 
# inspect(associationrulesS8[1:100])
# summary(associationrulesS8)


# Min Support as 0.0003, confidence as 0.6  => 63 rules
associationrulesS2 <- apriori(tr, parameter = list(supp=0.0003, conf=0.6)) 
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.6    0.1    1 none FALSE            TRUE       5   3e-04      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 3 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[4248 item(s), 10454 transaction(s)] done [0.01s].
## sorting and recoding items ... [1596 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.01s].
## writing ... [63 rule(s)] done [0.01s].
## creating S4 object  ... done [0.00s].
summary(associationrulesS2)
## set of 63 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2  3 
## 53 10 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.000   2.000   2.159   2.000   3.000 
## 
## summary of quality measures:
##     support            confidence          lift             count       
##  Min.   :0.0003826   Min.   :0.6000   Min.   :  22.00   Min.   : 4.000  
##  1st Qu.:0.0003826   1st Qu.:0.6667   1st Qu.:  55.07   1st Qu.: 4.000  
##  Median :0.0004783   Median :0.7143   Median : 186.68   Median : 5.000  
##  Mean   :0.0006195   Mean   :0.7664   Mean   : 422.32   Mean   : 6.476  
##  3rd Qu.:0.0006696   3rd Qu.:0.8167   3rd Qu.: 805.35   3rd Qu.: 7.000  
##  Max.   :0.0044959   Max.   :1.0000   Max.   :1493.43   Max.   :47.000  
## 
## mining info:
##  data ntransactions support confidence
##    tr         10454   3e-04        0.6
inspect(associationrulesS2[1:10])
##      lhs          rhs       support      confidence lift      count
## [1]  {PRY0006} => {PRY0003} 0.0003826287 1.0         614.9412 4    
## [2]  {POL0007} => {POL0010} 0.0003826287 1.0        1161.5556 4    
## [3]  {REP0233} => {REP0232} 0.0003826287 0.8        1194.7429 4    
## [4]  {SNS0021} => {SNS0014} 0.0003826287 0.8         309.7481 4    
## [5]  {APP2158} => {APP1215} 0.0003826287 1.0          73.1049 4    
## [6]  {IFX0047} => {IFX0036} 0.0003826287 1.0         197.2453 4    
## [7]  {OWC0056} => {OWC0054} 0.0005739430 1.0         871.1667 6    
## [8]  {OWC0058} => {OWC0057} 0.0003826287 0.8         836.3200 4    
## [9]  {POL0008} => {POL0010} 0.0004782858 1.0        1161.5556 5    
## [10] {AKI0015} => {APP1913} 0.0003826287 0.8          66.3746 4
#Inspect results
inspect(sort(associationrulesS2,  decreasing = TRUE, by = "lift")[1:25])
##      lhs                  rhs       support      confidence lift     
## [1]  {WAC0156}         => {WAC0158} 0.0006696002 1.0000000  1493.4286
## [2]  {WAC0158}         => {WAC0156} 0.0006696002 1.0000000  1493.4286
## [3]  {NEA0012}         => {NEA0008} 0.0003826287 1.0000000  1306.7500
## [4]  {REP0233}         => {REP0232} 0.0003826287 0.8000000  1194.7429
## [5]  {NTE0067}         => {NTE0068} 0.0003826287 0.8000000  1194.7429
## [6]  {POL0007}         => {POL0010} 0.0003826287 1.0000000  1161.5556
## [7]  {POL0008}         => {POL0010} 0.0004782858 1.0000000  1161.5556
## [8]  {NES0009}         => {NES0006} 0.0006696002 1.0000000  1045.4000
## [9]  {NES0006}         => {NES0009} 0.0006696002 0.7000000  1045.4000
## [10] {IHE0020}         => {IHE0017} 0.0004782858 0.8333333   967.9630
## [11] {IHE0019}         => {IHE0017} 0.0003826287 0.8000000   929.2444
## [12] {OWC0056}         => {OWC0054} 0.0005739430 1.0000000   871.1667
## [13] {NEA0009}         => {NEA0011} 0.0003826287 0.6666667   871.1667
## [14] {NEA0011}         => {NEA0004} 0.0005739430 0.7500000   871.1667
## [15] {NEA0004}         => {NEA0011} 0.0005739430 0.6666667   871.1667
## [16] {OWC0058}         => {OWC0057} 0.0003826287 0.8000000   836.3200
## [17] {NEA0009}         => {NEA0004} 0.0003826287 0.6666667   774.3704
## [18] {NEA0001}         => {NEA0004} 0.0004782858 0.6250000   725.9722
## [19] {PRY0006}         => {PRY0003} 0.0003826287 1.0000000   614.9412
## [20] {PRY0004}         => {PRY0003} 0.0005739430 1.0000000   614.9412
## [21] {SSE0005}         => {SSE0004} 0.0006696002 0.6363636   604.7769
## [22] {SSE0004}         => {SSE0005} 0.0006696002 0.6363636   604.7769
## [23] {SNS0010}         => {SNS0019} 0.0004782858 0.7142857   466.6964
## [24] {APP0657,SAT0048} => {MMW0010} 0.0004782858 0.7142857   439.2437
## [25] {ELA0019}         => {ELA0016} 0.0004782858 0.6250000   362.9861
##      count
## [1]  7    
## [2]  7    
## [3]  4    
## [4]  4    
## [5]  4    
## [6]  4    
## [7]  5    
## [8]  7    
## [9]  7    
## [10] 5    
## [11] 4    
## [12] 6    
## [13] 4    
## [14] 6    
## [15] 6    
## [16] 4    
## [17] 4    
## [18] 5    
## [19] 4    
## [20] 6    
## [21] 7    
## [22] 7    
## [23] 5    
## [24] 5    
## [25] 5
inspect(sort(associationrulesS2,  decreasing = TRUE, by = "support")[1:25])
##      lhs          rhs       support      confidence lift       count
## [1]  {APP0979} => {APP0692} 0.0044958867 0.6103896    22.00349 47   
## [2]  {APP1803} => {APP1215} 0.0016261718 0.8947368    65.40964 17   
## [3]  {APP2117} => {APP1215} 0.0015305146 0.6666667    48.73660 16   
## [4]  {SPH0016} => {SPH0015} 0.0010522288 0.6470588   338.21765 11   
## [5]  {OWC0104} => {NTE0007} 0.0009565716 0.6250000    36.50140 10   
## [6]  {APP2118} => {APP1215} 0.0008609145 0.6428571    46.99600  9   
## [7]  {APP2486} => {APP1184} 0.0008609145 0.6923077    25.57380  9   
## [8]  {APP1575} => {LIBRO}   0.0007652573 0.6666667   142.23129  8   
## [9]  {IFX0087} => {IFX0028} 0.0007652573 0.7272727   330.56126  8   
## [10] {WAC0156} => {WAC0158} 0.0006696002 1.0000000  1493.42857  7   
## [11] {WAC0158} => {WAC0156} 0.0006696002 1.0000000  1493.42857  7   
## [12] {NES0009} => {NES0006} 0.0006696002 1.0000000  1045.40000  7   
## [13] {NES0006} => {NES0009} 0.0006696002 0.7000000  1045.40000  7   
## [14] {SSE0005} => {SSE0004} 0.0006696002 0.6363636   604.77686  7   
## [15] {SSE0004} => {SSE0005} 0.0006696002 0.6363636   604.77686  7   
## [16] {SPH0014} => {SPH0015} 0.0006696002 0.6363636   332.62727  7   
## [17] {APP2113} => {APP1215} 0.0006696002 0.7777778    56.85936  7   
## [18] {OWC0056} => {OWC0054} 0.0005739430 1.0000000   871.16667  6   
## [19] {PRY0004} => {PRY0003} 0.0005739430 1.0000000   614.94118  6   
## [20] {PAC2115} => {CAD0005} 0.0005739430 0.7500000   186.67857  6   
## [21] {APP2125} => {APP1215} 0.0005739430 0.7500000    54.82867  6   
## [22] {QNA0149} => {WDT0177} 0.0005739430 0.6666667    37.67207  6   
## [23] {APP2114} => {APP1215} 0.0005739430 0.6000000    43.86294  6   
## [24] {APP1623} => {APP1215} 0.0005739430 0.6666667    48.73660  6   
## [25] {PAC2154} => {APP0017} 0.0005739430 0.6000000   101.16774  6
inspect(sort(associationrulesS2,  decreasing = TRUE, by = "confidence")[1:25])
##      lhs                  rhs       support      confidence lift      
## [1]  {PRY0006}         => {PRY0003} 0.0003826287 1.0000000   614.94118
## [2]  {POL0007}         => {POL0010} 0.0003826287 1.0000000  1161.55556
## [3]  {APP2158}         => {APP1215} 0.0003826287 1.0000000    73.10490
## [4]  {IFX0047}         => {IFX0036} 0.0003826287 1.0000000   197.24528
## [5]  {OWC0056}         => {OWC0054} 0.0005739430 1.0000000   871.16667
## [6]  {POL0008}         => {POL0010} 0.0004782858 1.0000000  1161.55556
## [7]  {WAC0156}         => {WAC0158} 0.0006696002 1.0000000  1493.42857
## [8]  {WAC0158}         => {WAC0156} 0.0006696002 1.0000000  1493.42857
## [9]  {APP1495}         => {APP1215} 0.0003826287 1.0000000    73.10490
## [10] {PRY0004}         => {PRY0003} 0.0005739430 1.0000000   614.94118
## [11] {NES0009}         => {NES0006} 0.0006696002 1.0000000  1045.40000
## [12] {NEA0012}         => {NEA0008} 0.0003826287 1.0000000  1306.75000
## [13] {APP1803,APP2113} => {APP1215} 0.0003826287 1.0000000    73.10490
## [14] {APP1803}         => {APP1215} 0.0016261718 0.8947368    65.40964
## [15] {IHE0020}         => {IHE0017} 0.0004782858 0.8333333   967.96296
## [16] {CRU0047,OWC0147} => {NTE0007} 0.0004782858 0.8333333    48.66853
## [17] {REP0233}         => {REP0232} 0.0003826287 0.8000000  1194.74286
## [18] {SNS0021}         => {SNS0014} 0.0003826287 0.8000000   309.74815
## [19] {OWC0058}         => {OWC0057} 0.0003826287 0.8000000   836.32000
## [20] {AKI0015}         => {APP1913} 0.0003826287 0.8000000    66.37460
## [21] {APP2111}         => {APP1215} 0.0003826287 0.8000000    58.48392
## [22] {NTE0067}         => {NTE0068} 0.0003826287 0.8000000  1194.74286
## [23] {IHE0019}         => {IHE0017} 0.0003826287 0.8000000   929.24444
## [24] {MIN0005,OTT0171} => {APP1184} 0.0003826287 0.8000000    29.55194
## [25] {APP1184,OTT0171} => {MIN0005} 0.0003826287 0.8000000   167.26400
##      count
## [1]   4   
## [2]   4   
## [3]   4   
## [4]   4   
## [5]   6   
## [6]   5   
## [7]   7   
## [8]   7   
## [9]   4   
## [10]  6   
## [11]  7   
## [12]  4   
## [13]  4   
## [14] 17   
## [15]  5   
## [16]  5   
## [17]  4   
## [18]  4   
## [19]  4   
## [20]  4   
## [21]  4   
## [22]  4   
## [23]  4   
## [24]  4   
## [25]  4
#Improving the model

#Removing redundant rules (rules that are subsets of larger rules)
associationSubsetS2<- which(colSums(is.subset(associationrulesS2, associationrulesS2)) > 1) # get subset rules in vector
length(associationSubsetS2) #15 
## [1] 15
associationrulesNosubsets <- associationrulesS2[-associationSubsetS2] # remove subset rules.
summary(associationrulesNosubsets) #48 rules
## set of 48 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2  3 
## 45  3 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.000   2.000   2.062   2.000   3.000 
## 
## summary of quality measures:
##     support            confidence          lift             count       
##  Min.   :0.0003826   Min.   :0.6000   Min.   :  22.00   Min.   : 4.000  
##  1st Qu.:0.0003826   1st Qu.:0.6667   1st Qu.:  51.08   1st Qu.: 4.000  
##  Median :0.0004783   Median :0.7208   Median : 139.18   Median : 5.000  
##  Mean   :0.0006437   Mean   :0.7638   Mean   : 362.07   Mean   : 6.729  
##  3rd Qu.:0.0005739   3rd Qu.:0.8083   3rd Qu.: 642.70   3rd Qu.: 6.000  
##  Max.   :0.0044959   Max.   :1.0000   Max.   :1306.75   Max.   :47.000  
## 
## mining info:
##  data ntransactions support confidence
##    tr         10454   3e-04        0.6
inspect(associationrulesNosubsets[1:20])
##      lhs          rhs       support      confidence lift       count
## [1]  {PRY0006} => {PRY0003} 0.0003826287 1.0000000   614.94118 4    
## [2]  {POL0007} => {POL0010} 0.0003826287 1.0000000  1161.55556 4    
## [3]  {REP0233} => {REP0232} 0.0003826287 0.8000000  1194.74286 4    
## [4]  {SNS0021} => {SNS0014} 0.0003826287 0.8000000   309.74815 4    
## [5]  {APP2158} => {APP1215} 0.0003826287 1.0000000    73.10490 4    
## [6]  {IFX0047} => {IFX0036} 0.0003826287 1.0000000   197.24528 4    
## [7]  {OWC0056} => {OWC0054} 0.0005739430 1.0000000   871.16667 6    
## [8]  {OWC0058} => {OWC0057} 0.0003826287 0.8000000   836.32000 4    
## [9]  {POL0008} => {POL0010} 0.0004782858 1.0000000  1161.55556 5    
## [10] {AKI0015} => {APP1913} 0.0003826287 0.8000000    66.37460 4    
## [11] {APP2111} => {APP1215} 0.0003826287 0.8000000    58.48392 4    
## [12] {APP1495} => {APP1215} 0.0003826287 1.0000000    73.10490 4    
## [13] {LAC0235} => {APP1913} 0.0003826287 0.6666667    55.31217 4    
## [14] {LAC0238} => {APP1913} 0.0004782858 0.6250000    51.85516 5    
## [15] {PRY0004} => {PRY0003} 0.0005739430 1.0000000   614.94118 6    
## [16] {NTE0067} => {NTE0068} 0.0003826287 0.8000000  1194.74286 4    
## [17] {PAC2111} => {CAD0005} 0.0003826287 0.6666667   165.93651 4    
## [18] {NEA0012} => {NEA0008} 0.0003826287 1.0000000  1306.75000 4    
## [19] {SNS0010} => {SNS0019} 0.0004782858 0.7142857   466.69643 5    
## [20] {PAC2115} => {CAD0005} 0.0005739430 0.7500000   186.67857 6
 #Look for Redundants => no redundants
is.redundant(associationrulesNosubsets)
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE
#Visualizations 
#default plot
plot(associationrulesNosubsets[1:20], control=list(type="items")) 
## Warning: Unknown control parameters: type
## Available control parameters (with default values):
## main  =  Scatter plot for 20 rules
## engine    =  default
## pch   =  19
## cex   =  0.5
## xlim  =  NULL
## ylim  =  NULL
## zlim  =  NULL
## alpha     =  NULL
## col   =  c("#EE0000FF", "#EE0303FF", "#EE0606FF", "#EE0909FF", "#EE0C0CFF", "#EE0F0FFF", "#EE1212FF", "#EE1515FF", "#EE1818FF", "#EE1B1BFF", "#EE1E1EFF", "#EE2222FF", "#EE2525FF", "#EE2828FF", "#EE2B2BFF", "#EE2E2EFF", "#EE3131FF", "#EE3434FF", "#EE3737FF", "#EE3A3AFF", "#EE3D3DFF", "#EE4040FF", "#EE4444FF", "#EE4747FF", "#EE4A4AFF", "#EE4D4DFF", "#EE5050FF", "#EE5353FF", "#EE5656FF", "#EE5959FF", "#EE5C5CFF", "#EE5F5FFF", "#EE6262FF", "#EE6666FF", "#EE6969FF", "#EE6C6CFF", "#EE6F6FFF", "#EE7272FF", "#EE7575FF",  "#EE7878FF", "#EE7B7BFF", "#EE7E7EFF", "#EE8181FF", "#EE8484FF", "#EE8888FF", "#EE8B8BFF", "#EE8E8EFF", "#EE9191FF", "#EE9494FF", "#EE9797FF", "#EE9999FF", "#EE9B9BFF", "#EE9D9DFF", "#EE9F9FFF", "#EEA0A0FF", "#EEA2A2FF", "#EEA4A4FF", "#EEA5A5FF", "#EEA7A7FF", "#EEA9A9FF", "#EEABABFF", "#EEACACFF", "#EEAEAEFF", "#EEB0B0FF", "#EEB1B1FF", "#EEB3B3FF", "#EEB5B5FF", "#EEB7B7FF", "#EEB8B8FF", "#EEBABAFF", "#EEBCBCFF", "#EEBDBDFF", "#EEBFBFFF", "#EEC1C1FF", "#EEC3C3FF", "#EEC4C4FF", "#EEC6C6FF", "#EEC8C8FF",  "#EEC9C9FF", "#EECBCBFF", "#EECDCDFF", "#EECFCFFF", "#EED0D0FF", "#EED2D2FF", "#EED4D4FF", "#EED5D5FF", "#EED7D7FF", "#EED9D9FF", "#EEDBDBFF", "#EEDCDCFF", "#EEDEDEFF", "#EEE0E0FF", "#EEE1E1FF", "#EEE3E3FF", "#EEE5E5FF", "#EEE7E7FF", "#EEE8E8FF", "#EEEAEAFF", "#EEECECFF", "#EEEEEEFF")
## newpage   =  TRUE
## jitter    =  NA
## verbose   =  FALSE
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

#graph plot
plot(associationrulesNosubsets[1:20], method="graph", control=list(type="items"))
## Warning: Unknown control parameters: type
## Available control parameters (with default values):
## main  =  Graph for 20 rules
## nodeColors    =  c("#66CC6680", "#9999CC80")
## nodeCol   =  c("#EE0000FF", "#EE0303FF", "#EE0606FF", "#EE0909FF", "#EE0C0CFF", "#EE0F0FFF", "#EE1212FF", "#EE1515FF", "#EE1818FF", "#EE1B1BFF", "#EE1E1EFF", "#EE2222FF", "#EE2525FF", "#EE2828FF", "#EE2B2BFF", "#EE2E2EFF", "#EE3131FF", "#EE3434FF", "#EE3737FF", "#EE3A3AFF", "#EE3D3DFF", "#EE4040FF", "#EE4444FF", "#EE4747FF", "#EE4A4AFF", "#EE4D4DFF", "#EE5050FF", "#EE5353FF", "#EE5656FF", "#EE5959FF", "#EE5C5CFF", "#EE5F5FFF", "#EE6262FF", "#EE6666FF", "#EE6969FF", "#EE6C6CFF", "#EE6F6FFF", "#EE7272FF", "#EE7575FF",  "#EE7878FF", "#EE7B7BFF", "#EE7E7EFF", "#EE8181FF", "#EE8484FF", "#EE8888FF", "#EE8B8BFF", "#EE8E8EFF", "#EE9191FF", "#EE9494FF", "#EE9797FF", "#EE9999FF", "#EE9B9BFF", "#EE9D9DFF", "#EE9F9FFF", "#EEA0A0FF", "#EEA2A2FF", "#EEA4A4FF", "#EEA5A5FF", "#EEA7A7FF", "#EEA9A9FF", "#EEABABFF", "#EEACACFF", "#EEAEAEFF", "#EEB0B0FF", "#EEB1B1FF", "#EEB3B3FF", "#EEB5B5FF", "#EEB7B7FF", "#EEB8B8FF", "#EEBABAFF", "#EEBCBCFF", "#EEBDBDFF", "#EEBFBFFF", "#EEC1C1FF", "#EEC3C3FF", "#EEC4C4FF", "#EEC6C6FF", "#EEC8C8FF",  "#EEC9C9FF", "#EECBCBFF", "#EECDCDFF", "#EECFCFFF", "#EED0D0FF", "#EED2D2FF", "#EED4D4FF", "#EED5D5FF", "#EED7D7FF", "#EED9D9FF", "#EEDBDBFF", "#EEDCDCFF", "#EEDEDEFF", "#EEE0E0FF", "#EEE1E1FF", "#EEE3E3FF", "#EEE5E5FF", "#EEE7E7FF", "#EEE8E8FF", "#EEEAEAFF", "#EEECECFF", "#EEEEEEFF")
## edgeCol   =  c("#474747FF", "#494949FF", "#4B4B4BFF", "#4D4D4DFF", "#4F4F4FFF", "#515151FF", "#535353FF", "#555555FF", "#575757FF", "#595959FF", "#5B5B5BFF", "#5E5E5EFF", "#606060FF", "#626262FF", "#646464FF", "#666666FF", "#686868FF", "#6A6A6AFF", "#6C6C6CFF", "#6E6E6EFF", "#707070FF", "#727272FF", "#747474FF", "#767676FF", "#787878FF", "#7A7A7AFF", "#7C7C7CFF", "#7E7E7EFF", "#808080FF", "#828282FF", "#848484FF", "#868686FF", "#888888FF", "#8A8A8AFF", "#8C8C8CFF", "#8D8D8DFF", "#8F8F8FFF", "#919191FF", "#939393FF",  "#959595FF", "#979797FF", "#999999FF", "#9A9A9AFF", "#9C9C9CFF", "#9E9E9EFF", "#A0A0A0FF", "#A2A2A2FF", "#A3A3A3FF", "#A5A5A5FF", "#A7A7A7FF", "#A9A9A9FF", "#AAAAAAFF", "#ACACACFF", "#AEAEAEFF", "#AFAFAFFF", "#B1B1B1FF", "#B3B3B3FF", "#B4B4B4FF", "#B6B6B6FF", "#B7B7B7FF", "#B9B9B9FF", "#BBBBBBFF", "#BCBCBCFF", "#BEBEBEFF", "#BFBFBFFF", "#C1C1C1FF", "#C2C2C2FF", "#C3C3C4FF", "#C5C5C5FF", "#C6C6C6FF", "#C8C8C8FF", "#C9C9C9FF", "#CACACAFF", "#CCCCCCFF", "#CDCDCDFF", "#CECECEFF", "#CFCFCFFF", "#D1D1D1FF",  "#D2D2D2FF", "#D3D3D3FF", "#D4D4D4FF", "#D5D5D5FF", "#D6D6D6FF", "#D7D7D7FF", "#D8D8D8FF", "#D9D9D9FF", "#DADADAFF", "#DBDBDBFF", "#DCDCDCFF", "#DDDDDDFF", "#DEDEDEFF", "#DEDEDEFF", "#DFDFDFFF", "#E0E0E0FF", "#E0E0E0FF", "#E1E1E1FF", "#E1E1E1FF", "#E2E2E2FF", "#E2E2E2FF", "#E2E2E2FF")
## alpha     =  0.5
## cex   =  1
## itemLabels    =  TRUE
## labelCol  =  #000000B3
## measureLabels     =  FALSE
## precision     =  3
## layout    =  NULL
## layoutParams  =  list()
## arrowSize     =  0.5
## engine    =  igraph
## plot  =  TRUE
## plot_options  =  list()
## max   =  100
## verbose   =  FALSE

#two key plot
plot(associationrulesNosubsets,method="two-key plot")
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

plotly_arules(associationrulesNosubsets[1:20])
## Warning: 'plotly_arules' is deprecated.
## Use 'plot' instead.
## See help("Deprecated")
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
#Filtering rules

#confidence > 0.4
subRules<-associationrulesNosubsets[quality(associationrulesNosubsets)$confidence>0.4]
plot(subRules)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

#top 10 by confidence
top10subRules <- head(subRules, n = 10, by = "confidence")
plot(top10subRules, method = "graph",  engine = "htmlwidget")